home *** CD-ROM | disk | FTP | other *** search
- #!/usr/local/bin/perl
-
- # ====================================================================
- # Copyright (c) 1995-1999 The Apache Group. All rights reserved.
- #
- # Redistribution and use in source and binary forms, with or without
- # modification, are permitted provided that the following conditions
- # are met:
- #
- # 1. Redistributions of source code must retain the above copyright
- # notice, this list of conditions and the following disclaimer.
- #
- # 2. Redistributions in binary form must reproduce the above copyright
- # notice, this list of conditions and the following disclaimer in
- # the documentation and/or other materials provided with the
- # distribution.
- #
- # 3. All advertising materials mentioning features or use of this
- # software must display the following acknowledgment:
- # "This product includes software developed by the Apache Group
- # for use in the Apache HTTP server project (http://www.apache.org/)."
- #
- # 4. The names "Apache Server" and "Apache Group" must not be used to
- # endorse or promote products derived from this software without
- # prior written permission. For written permission, please contact
- # apache@apache.org.
- #
- # 5. Products derived from this software may not be called "Apache"
- # nor may "Apache" appear in their names without prior written
- # permission of the Apache Group.
- #
- # 6. Redistributions of any form whatsoever must retain the following
- # acknowledgment:
- # "This product includes software developed by the Apache Group
- # for use in the Apache HTTP server project (http://www.apache.org/)."
- #
- # THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY
- # EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR
- # ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- # OF THE POSSIBILITY OF SUCH DAMAGE.
- # ====================================================================
- #
- # This software consists of voluntary contributions made by many
- # individuals on behalf of the Apache Group and was originally based
- # on public domain software written at the National Center for
- # Supercomputing Applications, University of Illinois, Urbana-Champaign.
- # For more information on the Apache Group and the Apache HTTP server
- # project, please see <http://www.apache.org/>.
-
- # logresolve.pl
- #
- # v 1.2 by robh @ imdb.com
- #
- # usage: logresolve.pl <infile >outfile
- #
- # input = Apache/NCSA/.. logfile with IP numbers at start of lines
- # output = same logfile with IP addresses resolved to hostnames where
- # name lookups succeeded.
- #
- # this differs from the C based 'logresolve' in that this script
- # spawns a number ($CHILDREN) of subprocesses to resolve addresses
- # concurrently and sets a short timeout ($TIMEOUT) for each lookup in
- # order to keep things moving quickly.
- #
- # the parent process handles caching of IP->hostnames using a Perl hash
- # it also avoids sending the same IP to multiple child processes to be
- # resolved multiple times concurrently.
- #
- # Depending on the settings of $CHILDREN and $TIMEOUT you should see
- # significant reductions in the overall time taken to resolve your
- # logfiles. With $CHILDREN=40 and $TIMEOUT=5 I've seen 200,000 - 300,000
- # logfile lines processed per hour compared to ~45,000 per hour
- # with 'logresolve'.
- #
- # I haven't yet seen any noticable reduction in the percentage of IPs
- # that fail to get resolved. Your mileage will no doubt vary. 5s is long
- # enough to wait IMO.
- #
- # Known to work with FreeBSD 2.2
- # Known to have problems with Solaris
- #
- # 980417 - use 'sockaddr_un' for bind/connect to make the script work
- # with linux. Fix from Luuk de Boer <luuk_de_boer@pi.net>
-
- require 5.004;
-
- $|=1;
-
- use FileHandle;
- use Socket;
-
- use strict;
- no strict 'refs';
-
- use vars qw($PROTOCOL);
- $PROTOCOL = 0;
-
- my $CHILDREN = 40;
- my $TIMEOUT = 5;
-
- my $filename;
- my %hash = ();
- my $parent = $$;
-
- my @children = ();
- for (my $child = 1; $child <=$CHILDREN; $child++) {
- my $f = fork();
- if (!$f) {
- $filename = "./.socket.$parent.$child";
- if (-e $filename) { unlink($filename) || warn "$filename .. $!\n";}
- &child($child);
- exit(0);
- }
- push(@children, $f);
- }
-
- &parent;
- &cleanup;
-
- ## remove all temporary files before shutting down
- sub cleanup {
- # die kiddies, die
- kill(15, @children);
- for (my $child = 1; $child <=$CHILDREN; $child++) {
- if (-e "./.socket.$parent.$child") {
- unlink("./.socket.$parent.$child")
- || warn ".socket.$parent.$child $!";
- }
- }
- }
-
- sub parent {
- # Trap some possible signals to trigger temp file cleanup
- $SIG{'KILL'} = $SIG{'INT'} = $SIG{'PIPE'} = \&cleanup;
-
- my %CHILDSOCK;
- my $filename;
-
- ## fork child processes. Each child will create a socket connection
- ## to this parent and use an unique temp filename to do so.
- for (my $child = 1; $child <=$CHILDREN; $child++) {
- $CHILDSOCK{$child}= FileHandle->new;
-
- if (!socket($CHILDSOCK{$child}, AF_UNIX, SOCK_STREAM, $PROTOCOL)) {
- warn "parent socket to child failed $!";
- }
- $filename = "./.socket.$parent.$child";
- my $response;
- do {
- $response = connect($CHILDSOCK{$child}, sockaddr_un($filename));
- if ($response != 1) {
- sleep(1);
- }
- } while ($response != 1);
- $CHILDSOCK{$child}->autoflush;
- }
- ## All child processes should now be ready or at worst warming up
-
- my (@buffer, $child, $ip, $rest, $hostname, $response);
- ## read the logfile lines from STDIN
- while(<STDIN>) {
- @buffer = (); # empty the logfile line buffer array.
- $child = 1; # children are numbered 1..N, start with #1
-
- # while we have a child to talk to and data to give it..
- do {
- push(@buffer, $_); # buffer the line
- ($ip, $rest) = split(/ /, $_, 2); # separate IP form rest
-
- unless ($hash{$ip}) { # resolve if unseen IP
- $CHILDSOCK{$child}->print("$ip\n"); # pass IP to next child
- $hash{$ip} = $ip; # don't look it up again.
- $child++;
- }
- } while (($child < ($CHILDREN-1)) and ($_ = <STDIN>));
-
- ## now poll each child for a response
- while (--$child > 0) {
- $response = $CHILDSOCK{$child}->getline;
- chomp($response);
- # child sends us back both the IP and HOSTNAME, no need for us
- # to remember what child received any given IP, and no worries
- # what order we talk to the children
- ($ip, $hostname) = split(/\|/, $response, 2);
- $hash{$ip} = $hostname;
- }
-
- # resolve all the logfiles lines held in the log buffer array..
- for (my $line = 0; $line <=$#buffer; $line++) {
- # get next buffered line
- ($ip, $rest) = split(/ /, $buffer[$line], 2);
- # separate IP from rest and replace with cached hostname
- printf STDOUT ("%s %s", $hash{$ip}, $rest);
- }
- }
- }
-
- ########################################
-
- sub child {
- # arg = numeric ID - how the parent refers to me
- my $me = shift;
-
- # add trap for alarm signals.
- $SIG{'ALRM'} = sub { die "alarmed"; };
-
- # create a socket to communicate with parent
- socket(INBOUND, AF_UNIX, SOCK_STREAM, $PROTOCOL)
- || die "Error with Socket: !$\n";
- $filename = "./.socket.$parent.$me";
- bind(INBOUND, sockaddr_un($filename))
- || die "Error Binding $filename: $!\n";
- listen(INBOUND, 5) || die "Error Listening: $!\n";
-
- my ($ip, $send_back);
- my $talk = FileHandle->new;
-
- # accept a connection from the parent process. We only ever have
- # have one connection where we exchange 1 line of info with the
- # parent.. 1 line in (IP address), 1 line out (IP + hostname).
- accept($talk, INBOUND) || die "Error Accepting: $!\n";
- # disable I/O buffering just in case
- $talk->autoflush;
- # while the parent keeps sending data, we keep responding..
- while(($ip = $talk->getline)) {
- chomp($ip);
- # resolve the IP if time permits and send back what we found..
- $send_back = sprintf("%s|%s", $ip, &nslookup($ip));
- $talk->print($send_back."\n");
- }
- }
-
- # perform a time restricted hostname lookup.
- sub nslookup {
- # get the IP as an arg
- my $ip = shift;
- my $hostname = undef;
-
- # do the hostname lookup inside an eval. The eval will use the
- # already configured SIGnal handler and drop out of the {} block
- # regardless of whether the alarm occured or not.
- eval {
- alarm($TIMEOUT);
- $hostname = gethostbyaddr(gethostbyname($ip), AF_INET);
- alarm(0);
- };
- if ($@ =~ /alarm/) {
- # useful for debugging perhaps..
- # print "alarming, isn't it? ($ip)";
- }
-
- # return the hostname or the IP address itself if there is no hostname
- $hostname ne "" ? $hostname : $ip;
- }
-
-
-